Random KFG Bin Sample:
— Pendant Pendant Sum Monte Carlo Simulation

1. Creating the distributions

1.1 The Existing FieldGuide Distribution

The first dataframe to build is a database of sums using the khipus in the existing KFG.

Code

import math
import random
from random import choices

import numpy as np
import pandas as pd
import khipu_kamayuq as kamayuq  # A Khipu Maker is known (in Quechua) as a Khipu Kamayuq
import khipu_qollqa as kq
from pandas import Series, DataFrame

# Plotly
import plotly
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
plotly.offline.init_notebook_mode(connected = False)

from monte_carlo import DiscreteDistributionSampler, PendantSummer, StrawmanKhipu

Code

(khipu_dict, all_khipus) = kamayuq.fetch_khipus()
strawmen_kfg_khipu = [StrawmanKhipu(aKhipu.name(), "KFG", [aCord.knotted_value() for aCord in aKhipu.pendant_cords()]) for aKhipu in all_khipus]
strawmen_kfg_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in strawmen_kfg_khipu], columns=StrawmanKhipu.dataframe_columns())
strawmen_kfg_df.head()

	name	source	num_pendants	mean_cord_value	stdev_cord_value	num_right_sums	num_left_sums	num_sums	mean_num_summands	stdev_num_summands	mean_sum_value	stdev_sum_value	num_sums_per_nonzero_pendant	mean_right_handedness	stdev_right_handedness	mean_left_handedness	stdev_left_handedness
0	AS010	KFG	27	8	10.392305	3	2	5	3.0	1.000000	22.0	9.695360	0.227273	4.333333	0.577350	-6.5	4.949747
1	AS011	KFG	15	92	183.904867	0	0	0	0.0	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.0	0.000000
2	AS012	KFG	85	2	5.196152	3	4	7	6.0	2.449490	18.0	7.348469	0.233333	10.333333	6.027714	-9.0	5.099020
3	AS013	KFG	90	4	14.456832	0	5	5	14.0	14.456832	48.0	44.508426	0.121951	0.000000	0.000000	-19.0	8.860023
4	AS014	KFG	42	53	40.137264	1	2	3	2.0	0.000000	99.0	7.000000	0.071429	17.000000	0.000000	-15.0	4.242641

Code

total_right_sums = sum(strawmen_kfg_df.num_right_sums.tolist())
total_left_sums = sum(strawmen_kfg_df.num_left_sums.tolist())
total_sums = total_right_sums + total_left_sums
left_pct = round(100.0*float(total_left_sums)/float(total_sums)) if total_sums > 0 else 0
right_pct = round(100.0*float(total_right_sums)/float(total_sums)) if total_sums > 0 else 0
(left_handed_mean, right_handed_mean) = (round(strawmen_kfg_df.mean_left_handedness.mean(),1), round(strawmen_kfg_df.mean_right_handedness.mean(),1))
(left_handed_stdev, right_handed_stdev) = (round(strawmen_kfg_df.mean_left_handedness.std(),1), round(strawmen_kfg_df.mean_right_handedness.std(),1))

print(f"Existing KFG - Right/Left Distribution = {right_pct}%/{left_pct}% ({total_right_sums}/{total_left_sums=})")
print(f"             - Right/Left Mean Handedness = {right_handed_mean}/{left_handed_mean} ±({right_handed_stdev}/{left_handed_stdev})")

strawmen_kfg_df.describe()

Existing KFG - Right/Left Distribution = 54%/46% (4354/total_left_sums=3734)
             - Right/Left Mean Handedness = 9.9/-8.5 ±(15.0/14.2)

	num_pendants	mean_cord_value	stdev_cord_value	num_right_sums	num_left_sums	num_sums	mean_num_summands	stdev_num_summands	mean_sum_value	stdev_sum_value	num_sums_per_nonzero_pendant	mean_right_handedness	stdev_right_handedness	mean_left_handedness	stdev_left_handedness
count	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000
mean	64.109231	309.835385	496.113591	6.698462	5.744615	12.443077	3.156923	2.032227	146.203077	70.992234	0.160933	9.914679	6.387789	-8.511022	5.777778
std	102.325678	1637.638528	2778.219238	13.630774	12.081363	25.319237	3.620689	3.925252	1052.163975	224.887696	0.179440	15.025712	12.766330	14.176137	11.853944
min	1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-123.303371	0.000000
25%	14.000000	5.000000	7.071068	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-11.555556	0.000000
50%	32.000000	19.500000	26.267844	1.000000	1.000000	2.000000	2.000000	0.000000	25.000000	5.291503	0.105823	5.666667	0.000000	-3.333333	0.000000
75%	77.750000	78.750000	134.914450	6.000000	5.000000	11.750000	5.000000	2.828427	67.000000	41.225340	0.250000	13.925926	7.771955	0.000000	7.681053
max	1650.000000	26324.000000	42206.089632	105.000000	101.000000	200.000000	27.000000	37.094474	24072.000000	3210.316028	0.823009	150.525773	129.388714	0.000000	117.441998

Of interest, note that for the existing KFG, handedness is 9 to 10, and it is asymmetric - that is the middle of the summands, by index, is a mean of 9 cords away from the sum cord. There are .16 sums per nonzero pendant.

1.2 Strawman Khipus based on a Random KFG Distribution

Next, we make a dataframe based on randomly generated khipus that has:

The same number of khipus as the KFG
A pendant cord count chosen randomly from the existing khipus’ pendant cord counts
Pendant values that are randomly chosen from the existing KFG cord values.

Code

cords_per_khipu = [aKhipu.num_pendant_cords() for aKhipu in all_khipus]
# Produce a khipu with # of cords sampled from the KFG khipu distribution
def sample_kfg_num_cords():
    num_cords = choices(cords_per_khipu,k=1)[0]
    if num_cords<3: num_cords = 3 # Mutate trivial khipus.
    return num_cords

# Number of random strawman khipus to produce
num_dummy_khipus = len(all_khipus)

cord_values = []
for aKhipu in all_khipus:
    cord_values += [aCord.knotted_value() for aCord in aKhipu.pendant_cords() if aCord.knotted_value() > 0]
sampler = DiscreteDistributionSampler(cord_values)

random_names = [f"rkfg_khipu_{i:05d}" for i in range(num_dummy_khipus)]
strawmen_rkfg_khipu = []
for i in range(num_dummy_khipus):
    random_cords = [round(x) for x in  sampler.bin_sample(sample_kfg_num_cords())]
    strawmen_rkfg_khipu.append( StrawmanKhipu(random_names[i], "runif", random_cords) )
strawmen_rkfg_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in strawmen_rkfg_khipu], columns=StrawmanKhipu.dataframe_columns())
strawmen_rkfg_df.head()

	name	source	num_pendants	mean_cord_value	stdev_cord_value	num_right_sums	num_left_sums	num_sums	mean_num_summands	stdev_num_summands	mean_sum_value	stdev_sum_value	num_sums_per_nonzero_pendant	mean_right_handedness	stdev_right_handedness	mean_left_handedness	stdev_left_handedness
0	rkfg_khipu_00000	runif	32	417.312500	1433.013145	0	4	4	4.0	4.472136	120.250000	183.301891	0.125000	0.0	0.000000	-12.25	9.742518
1	rkfg_khipu_00001	runif	19	54.578947	112.914085	2	0	2	5.0	2.828427	214.500000	262.336616	0.105263	6.5	2.121320	0.00	0.000000
2	rkfg_khipu_00002	runif	10	105.000000	215.724104	0	0	0	0.0	0.000000	0.000000	0.000000	0.000000	0.0	0.000000	0.00	0.000000
3	rkfg_khipu_00003	runif	27	214.074074	683.023703	3	0	3	6.0	6.403124	947.333333	1604.481640	0.111111	11.0	4.582576	0.00	0.000000
4	rkfg_khipu_00004	runif	27	84.185185	143.927876	0	2	2	3.0	2.000000	50.500000	50.204581	0.074074	0.0	0.000000	-15.00	7.071068

Code

total_right_sums = sum(strawmen_rkfg_df.num_right_sums.tolist())
total_left_sums = sum(strawmen_rkfg_df.num_left_sums.tolist())
total_sums = total_right_sums + total_left_sums
print(f"{total_right_sums=} {total_left_sums=}")
left_pct = round(100.0*float(total_left_sums)/float(total_sums)) if total_sums > 0 else 0
right_pct = round(100.0*float(total_right_sums)/float(total_sums)) if total_sums > 0 else 0
(left_handed_mean, right_handed_mean) = (round(strawmen_rkfg_df.mean_left_handedness.mean(),1), round(strawmen_rkfg_df.mean_right_handedness.mean(),1))
(left_handed_stdev, right_handed_stdev) = (round(strawmen_rkfg_df.mean_left_handedness.std(),1), round(strawmen_rkfg_df.mean_right_handedness.std(),1))

print(f"Random KFG - Right/Left Distribution = {right_pct}%/{left_pct}% ({total_right_sums}/{total_left_sums=})")
print(f"           - Right/Left Mean Handedness = {right_handed_mean}/{left_handed_mean} ±({right_handed_stdev}/{left_handed_stdev})")

strawmen_rkfg_df.describe()

total_right_sums=10610 total_left_sums=10514
Random KFG - Right/Left Distribution = 50%/50% (10610/total_left_sums=10514)
           - Right/Left Mean Handedness = 12.5/-12.3 ±(13.0/13.0)

	num_pendants	mean_cord_value	stdev_cord_value	num_right_sums	num_left_sums	num_sums	mean_num_summands	stdev_num_summands	mean_sum_value	stdev_sum_value	num_sums_per_nonzero_pendant	mean_right_handedness	stdev_right_handedness	mean_left_handedness	stdev_left_handedness
count	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000
mean	65.656923	337.083721	1250.850541	16.323077	16.175385	32.498462	2.735385	2.076469	92.076141	199.379435	0.237186	12.543546	9.248155	-12.319211	9.381162
std	121.786639	1671.626348	5118.629259	49.004263	48.696127	97.598218	1.918399	2.563036	132.220616	416.205046	0.212900	12.955093	12.735598	12.971127	12.907701
min	3.000000	4.666667	3.932768	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-75.119883	0.000000
25%	12.000000	75.766026	147.210025	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-20.099696	0.000000
50%	32.000000	157.701208	468.338420	3.000000	3.000000	6.000000	3.000000	1.414214	59.972973	50.325395	0.194444	10.000000	5.191793	-9.083333	4.405330
75%	80.000000	299.861423	1069.302015	15.000000	15.000000	29.750000	4.000000	3.162278	130.392157	220.179981	0.387399	20.237500	13.676254	0.000000	15.679902
max	1650.000000	40258.000000	113249.594284	702.000000	712.000000	1414.000000	8.000000	14.035669	1739.000000	4594.792355	0.856970	74.497151	96.889289	0.000000	100.346936

2. Random KFG vs. Existing KFG - Graphical Distribution

To graphically compare the distributions of the random khipus with existing khipus, one dataframe is needed:

Code

def source_color(x): return 0.0 if x == "KFG" else 1.0

combined_kfg_rkfg_df = pd.concat([strawmen_kfg_df, strawmen_rkfg_df], axis=0)
combined_kfg_rkfg_df['source_color'] = [source_color(x) for x in combined_kfg_rkfg_df.source.values]

Code

kfg_left =  sum(strawmen_kfg_df.num_left_sums.tolist())
kfg_right = sum(strawmen_kfg_df.num_right_sums.tolist())
pct_kfg_left = round(100.0*float(kfg_left)/float(kfg_left+kfg_right)) if ((kfg_left+kfg_right) > 0) else 0
pct_kfg_right = round(100.0*float(kfg_right)/float(kfg_left+kfg_right)) if ((kfg_left+kfg_right) > 0) else 0
rkfg_left = sum(strawmen_rkfg_df.num_left_sums.tolist())
rkfg_right = sum(strawmen_rkfg_df.num_right_sums.tolist())
pct_rkfg_left = round(100.0*float(rkfg_left)/float(rkfg_left+rkfg_right)) if ((rkfg_left+rkfg_right) > 0) else 0
pct_rkfg_right = round(100.0*float(rkfg_right)/float(rkfg_left+rkfg_right)) if ((rkfg_left+rkfg_right) > 0) else 0

print(f"Num Right/Left Sums for Existing KFG:{kfg_right}/{kfg_left} ({pct_kfg_right}%/{pct_kfg_left}%)")
print(f"Num Right/Left Sums for Random KFG: {rkfg_right}/{rkfg_left} ({pct_rkfg_right}%/{pct_rkfg_left}%)")

Num Right/Left Sums for Existing KFG:4354/3734 (54%/46%)
Num Right/Left Sums for Random KFG: 10610/10514 (50%/50%)

Code

legend_text = "<b>Random KFG vs Existing KFG - #Sums:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Summands</i>"
fig = (px.scatter(combined_kfg_rkfg_df, x="num_right_sums", y="num_left_sums", log_y=True,log_x=True,
                 size="mean_num_summands",
                 opacity=.4, 
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030',],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

As expected, random sums that are small occur more. They also have fewer summands. Let’s evaluate the number of summands for the random khipus vs. the existing khipus.

Code

legend_text = "<b>Random KFG vs Existing KFG - #Sums vs #Summands:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Sums/Pendan</i>"
fig = (px.scatter(combined_kfg_rkfg_df, x="mean_num_summands", y="num_sums", log_y=True,#log_x=True,
                 size="num_sums_per_nonzero_pendant",
                 opacity=.4, 
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030',],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

This echoes the previous statement about the number of summands being very different in the Random KFG set! A relatively clear separation occurs.

Code

legend_text = "<b>Random KFG vs Existing KFG - Sum Handedness:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Summands</i>"
fig = (px.scatter(combined_kfg_rkfg_df, x="mean_left_handedness", y="mean_right_handedness",
                 size="mean_num_summands", 
                 opacity=0.3,
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030', ],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

Existing KFG Khipus tend have their sums close, for obvious reasons. However the randomly generated khipus have many more far sums, with a small number of overall summands.

Code

legend_text = "<b>Random KFG vs Existing KFG - Mean Sum vs #Sums/Pendant:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Pendant</i>"
fig = (px.scatter(combined_kfg_rkfg_df, x="num_sums_per_nonzero_pendant", y="mean_sum_value", log_y=True,
                 size="num_pendants", 
                 opacity=0.5,
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030', ],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

3. Frequency Distributions

An examination of frequency distributions for key variables, using violin plots, where width=frequency and height=variable being measured.

Code

combined_kfg_rkfg_df['handedness_bias'] = [(abs(a)-abs(b)) for a,b in zip(combined_kfg_rkfg_df['num_right_sums'].values.tolist(), combined_kfg_rkfg_df['num_left_sums'].values.tolist())]
combined_kfg_rkfg_df['source'] = ["KFG" if source == 'KFG' else "Random KFG" for source in combined_kfg_rkfg_df.source.values.tolist()]
legend_text = "<b>Random KFG vs KFG - Handedness Bias (#RightHandedSums - #LeftHandedSums)</b>"
fig = (px.violin(combined_kfg_rkfg_df, y="handedness_bias",  
                 points='all', color="source",
                 hover_data=['name', 'num_sums'], title=legend_text,
                 width=944, height=944).show())

Code

legend_text = "<b>Random KFG vs KFG - Log(Mean Cord Value)</b>"
combined_kfg_rkfg_df['log_mean_cord_value'] = [math.log(abs(x)) if x > 0 else 0 for x in combined_kfg_rkfg_df['mean_cord_value'].values.tolist()]
fig = (px.violin(combined_kfg_rkfg_df, y="log_mean_cord_value",  
                 points='all', color="source",
                 hover_data=['name', 'num_sums'], title=legend_text,
                 width=944, height=944).show())

Code

legend_text = "Violin Plot <b>Random KFG vs Existing KFG -  Log(Sum Mean))</b>"
combined_kfg_rkfg_df['log_mean_sum'] = [math.log(abs(x)) if x > 0 else 0 for x in combined_kfg_rkfg_df['mean_sum_value'].values.tolist()]
fig = (px.violin(combined_kfg_rkfg_df, y="log_mean_sum", 
                 labels={"log_mean_sum": "Log(Sum Mean)"},
                 points='all', color="source",
                 hover_data=['name', 'num_sums'], title=legend_text,
                 width=944, height=944).show())

Code

legend_text = "Violin Plot <b>Random KFG vs Existing KFG - #Sums/Pendant</b>"
fig = (px.violin(combined_kfg_rkfg_df, y="num_sums_per_nonzero_pendant",  
                 points='all', color="source",
                 labels={"num_sums_per_nonzero_pendant": "#Sums/Pendant"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

Code

legend_text = "<b>Violin Plot - Random KFG vs Existing KFG - #Summands per Sum</b>"
fig = (px.violin(combined_kfg_rkfg_df, y="mean_num_summands",  
                 points='all', color="source",
                 labels={"mean_num_summands": "#Summands per Sum"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

This is also as you would expect - khipus that are randomly generated tend to not have large sums and they don’t have as many summands per pendant cord sum.